This script tries to find out which drugs are able to significantly alter indel pattern formation in RSTP2#5 cells in different chromatin contexts. In the end, the aim is to couple biological relevance to these changes.
How to make a good rendering table:
| column1 | column2 | column3 |
|---|---|---|
| 1 | 2 | 3 |
| a | b | c |
knitr::opts_chunk$set(echo = TRUE)
StartTime <-Sys.time()
# 8-digit Date tag:
Date <- substr(gsub("-","",Sys.time()),1,8)
# libraries:
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.0.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## ========================================
## ========================================
## circlize version 0.4.6
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
## ========================================
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Loading required package: graph
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unsplit, which,
## which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:circlize':
##
## degree
##
## Loading required package: futile.logger
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:VennDiagram':
##
## rotate
##
## Attaching package: 'Laurae'
## The following object is masked from 'package:data.table':
##
## setDF
Functions used thoughout this script.
# Import data from 5_HitValidation_DrugScreen.Rmd
setwd("/DATA/usr/m.trauernicht/projects/EpiScreen/epigenetic-screening-on-trip-clone/Data")
mapping_mean <- get(load("mt20190510_mapping_mean"))
indel.data <- get(load("mt20190502_indel.data"))
mapping_mean_long <- get(load("mt20190510_mapping_mean_long"))
chip_dendogram <- get(load("mt20190510_chip_dendogram"))
mapping_mean_all <- get(load("mt20190329_mapping_mean_all"))
barcode.order <- read.table("barcode_order.txt", header = T)
setwd("/DATA/projects/DSBrepair/scratch/")
delay.df <- read.table("cl20190329_minus7_delay.tsv", header = T)# Split up per concentration
indel.data.high <- indel.data[grep("10um", indel.data$condition),]
indel.data.med <- indel.data[grep("1um", indel.data$condition),]
indel.data.low <- indel.data[grep("100nm", indel.data$condition),]
# Compute mean of logratio per barcode and drug
indel.data.high$mean.logratio.bc.drug <-
ave(indel.data.high$logratio.platenorm,
indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.logratio.bc.drug <-
ave(indel.data.med$logratio.platenorm,
indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.logratio.bc.drug <-
ave(indel.data.low$logratio.platenorm,
indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))
# Compute mean of logratio per barcode and target
indel.data.high$mean.logratio.bc.target <-
ave(indel.data.high$logratio.platenorm,
indel.data.high$barcode, indel.data.high$Target, FUN = function(x) mean(x))
indel.data.med$mean.logratio.bc.target <-
ave(indel.data.med$logratio.platenorm,
indel.data.med$barcode, indel.data.med$Target, FUN = function(x) mean(x))
indel.data.low$mean.logratio.bc.target <-
ave(indel.data.low$logratio.platenorm,
indel.data.low$barcode, indel.data.low$Target, FUN = function(x) mean(x))
# Compute mean of t-statistic per barcode and target
indel.data.high$mean.statistic.bc.target <-
ave(indel.data.high$statistic,
indel.data.high$barcode, indel.data.high$Target, FUN = function(x) mean(x))
indel.data.med$mean.statistic.bc.target <-
ave(indel.data.med$statistic,
indel.data.med$barcode, indel.data.med$Target, FUN = function(x) mean(x))
indel.data.low$mean.statistic.bc.target <-
ave(indel.data.low$statistic,
indel.data.low$barcode, indel.data.low$Target, FUN = function(x) mean(x))
# Compute mean of t-statistic per barcode and drug
indel.data.high$mean.statistic.bc.drug <-
ave(indel.data.high$statistic,
indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.statistic.bc.drug <-
ave(indel.data.med$statistic,
indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.statistic.bc.drug <-
ave(indel.data.low$statistic,
indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))
# Compute mean of efficiency per barcode and drug
indel.data.high$mean.eff.bc.drug <-
ave(indel.data.high$efficiency.platenorm,
indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.med$mean.eff.bc.drug <-
ave(indel.data.med$efficiency.platenorm,
indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.low$mean.eff.bc.drug <-
ave(indel.data.low$efficiency.platenorm,
indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))
# Compute mean and sd of barcode counts per barcode and drug
indel.data.high$mean.count.bc.drug <-
ave(indel.data.high$rel.counts,
indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) mean(x))
indel.data.high$sd.count.bc.drug <-
ave(indel.data.high$rel.counts,
indel.data.high$barcode, indel.data.high$Drug, FUN = function(x) sd(x))
indel.data.med$mean.count.bc.drug <-
ave(indel.data.med$rel.counts,
indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) mean(x))
indel.data.med$sd.count.bc.drug <-
ave(indel.data.med$rel.counts,
indel.data.med$barcode, indel.data.med$Drug, FUN = function(x) sd(x))
indel.data.low$mean.count.bc.drug <-
ave(indel.data.low$rel.counts,
indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) mean(x))
indel.data.low$sd.count.bc.drug <-
ave(indel.data.low$rel.counts,
indel.data.low$barcode, indel.data.low$Drug, FUN = function(x) sd(x))# Remove columns
indel.data.high.eff <- indel.data.high[,c(1,6,45)]
indel.data.high.eff <- indel.data.high.eff[!duplicated(indel.data.high.eff), ]
indel.data.high.eff <- dcast(indel.data.high.eff, barcode ~ Drug, value.var = "mean.eff.bc.drug")
# Remove columns
indel.data.med.eff <- indel.data.med[,c(1,6,45)]
indel.data.med.eff <- indel.data.med.eff[!duplicated(indel.data.med.eff), ]
indel.data.med.eff <- dcast(indel.data.med.eff, barcode ~ Drug, value.var = "mean.eff.bc.drug")
# Remove columns
indel.data.low.eff <- indel.data.low[,c(1,6,45)]
indel.data.low.eff <- indel.data.low.eff[!duplicated(indel.data.low.eff), ]
indel.data.low.eff <- dcast(indel.data.low.eff, barcode ~ Drug, value.var = "mean.eff.bc.drug")# Remove columns
indel.data.high.count <- indel.data.high[indel.data.high$sd.count.bc.drug < 1,]
indel.data.high.count <- indel.data.high.count[,c(1,6,46)]
indel.data.high.count <- indel.data.high.count[!duplicated(indel.data.high.count), ]
indel.data.high.count <- dcast(indel.data.high.count, barcode ~ Drug, value.var = "mean.count.bc.drug")
indel.data.high.count <- indel.data.high.count[,-118]
indel.data.med.count <- indel.data.med[indel.data.med$sd.count.bc.drug < 1,]
indel.data.med.count <- indel.data.med.count[,c(1,6,46)]
indel.data.med.count <- indel.data.med.count[!duplicated(indel.data.med.count), ]
indel.data.med.count <- dcast(indel.data.med.count, barcode ~ Drug, value.var = "mean.count.bc.drug")
indel.data.med.count <- indel.data.med.count[,-146]
indel.data.low.count <- indel.data.low[indel.data.low$sd.count.bc.drug < 1,]
indel.data.low.count <- indel.data.low.count[,c(1,6,46)]
indel.data.low.count <- indel.data.low.count[!duplicated(indel.data.low.count), ]
indel.data.low.count <- dcast(indel.data.low.count, barcode ~ Drug, value.var = "mean.count.bc.drug")
indel.data.low.count <- indel.data.low.count[,-158]# Remove columns
indel.data.high.drug.st <- indel.data.high[,c(1,6,44)]
indel.data.med.drug.st <- indel.data.med[,c(1,6,44)]
indel.data.low.drug.st <- indel.data.low[,c(1,6,44)]
indel.data.high.drug.st <- indel.data.high.drug.st[!duplicated(indel.data.high.drug.st), ]
indel.data.med.drug.st <- indel.data.med.drug.st[!duplicated(indel.data.med.drug.st), ]
indel.data.low.drug.st <- indel.data.low.drug.st[!duplicated(indel.data.low.drug.st), ]
# Transform to wide format for visualization purposes
indel.data.low.drug.st <-
dcast(indel.data.low.drug.st, barcode ~ Drug, value.var="mean.statistic.bc.drug")
indel.data.med.drug.st <-
dcast(indel.data.med.drug.st, barcode ~ Drug, value.var="mean.statistic.bc.drug")
indel.data.high.drug.st <-
dcast(indel.data.high.drug.st, barcode ~ Drug, value.var="mean.statistic.bc.drug")# Remove columns
indel.data.high.drug <- indel.data.high[,c(1,6,41)]
indel.data.med.drug <- indel.data.med[,c(1,6,41)]
indel.data.low.drug <- indel.data.low[,c(1,6,41)]
indel.data.high.drug <-
indel.data.high.drug[!duplicated(indel.data.high.drug), ]
indel.data.med.drug <-
indel.data.med.drug[!duplicated(indel.data.med.drug), ]
indel.data.low.drug <-
indel.data.low.drug[!duplicated(indel.data.low.drug), ]
# Transform to wide format for visualization purposes
indel.data.low.drug <-
dcast(indel.data.low.drug,
barcode ~ Drug, value.var="mean.logratio.bc.drug")
indel.data.med.drug <-
dcast(indel.data.med.drug,
barcode ~ Drug, value.var="mean.logratio.bc.drug")
indel.data.high.drug <-
dcast(indel.data.high.drug,
barcode ~ Drug, value.var="mean.logratio.bc.drug")# Create matrix and use heatmap function
indel.data.high.drug.st <- indel.data.high.drug.st %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.med.drug.st <- indel.data.med.drug.st %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.low.drug.st <- indel.data.low.drug.st %>%
remove_rownames %>% column_to_rownames(var="barcode")# Create matrix and use heatmap function
indel.data.high.count <- na.omit(indel.data.high.count)
indel.data.high.count <- indel.data.high.count %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.med.count <- na.omit(indel.data.med.count)
indel.data.med.count <- indel.data.med.count %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.low.count <- na.omit(indel.data.low.count)
indel.data.low.count <- indel.data.low.count %>%
remove_rownames %>% column_to_rownames(var="barcode")# Add target to logratio df: high concentration
target.high <- indel.data.high[,c(6,7)]
target.high <- target.high[!duplicated(target.high), ]
indel.data.high.drug2 <- melt(indel.data.high.drug)## Using barcode as id variables
indel.data.high.drug2 <- dcast(indel.data.high.drug2, variable ~ barcode, value.var = "value")
setnames(target.high, old = "Drug", new = "variable")
indel.data.high.drug2 <- merge(indel.data.high.drug2, target.high)
indel.data.high.drug2 <- indel.data.high.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data2 <- indel.data.high.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.high.drug2$Target),]
# Add target to logratio df: med concentration
target.med <- indel.data.med[,c(6,7)]
target.med <- target.med[!duplicated(target.med), ]
indel.data.med.drug2 <- melt(indel.data.med.drug)## Using barcode as id variables
indel.data.med.drug2 <- dcast(indel.data.med.drug2, variable ~ barcode, value.var = "value")
setnames(target.med, old = "Drug", new = "variable")
indel.data.med.drug2 <- merge(indel.data.med.drug2, target.med)
indel.data.med.drug2 <- indel.data.med.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data3 <- indel.data.med.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.med.drug2$Target),]
# Add target to logratio df: low concentration
target.low <- indel.data.low[,c(6,7)]
target.low <- target.low[!duplicated(target.low), ]
indel.data.low.drug2 <- melt(indel.data.low.drug)## Using barcode as id variables
indel.data.low.drug2 <- dcast(indel.data.low.drug2, variable ~ barcode, value.var = "value")
setnames(target.low, old = "Drug", new = "variable")
indel.data.low.drug2 <- merge(indel.data.low.drug2, target.low)
indel.data.low.drug2 <- indel.data.low.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data4 <- indel.data.low.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.low.drug2$Target),]
# Add target to efficiency df: high concentration
target.high.eff <- indel.data.high[,c(6,7)]
target.high.eff <- target.high.eff[!duplicated(target.high.eff), ]
indel.data.high.drug2 <- melt(indel.data.high.eff)## Using barcode as id variables
indel.data.high.drug2 <- dcast(indel.data.high.drug2, variable ~ barcode, value.var = "value")
setnames(target.high.eff, old = "Drug", new = "variable")
indel.data.high.drug2 <- merge(indel.data.high.drug2, target.high.eff)
indel.data.high.drug2 <- indel.data.high.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data5 <- indel.data.high.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.high.drug2$Target),]
# Add target to efficiency df: med concentration
target.med.eff <- indel.data.med[,c(6,7)]
target.med.eff <- target.med.eff[!duplicated(target.med.eff), ]
indel.data.med.drug2 <- melt(indel.data.med.eff)## Using barcode as id variables
indel.data.med.drug2 <- dcast(indel.data.med.drug2, variable ~ barcode, value.var = "value")
setnames(target.med.eff, old = "Drug", new = "variable")
indel.data.med.drug2 <- merge(indel.data.med.drug2, target.med.eff)
indel.data.med.drug2 <- indel.data.med.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data6 <- indel.data.med.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.med.drug2$Target),]
# Add target to efficiency df: low concentration
target.low.eff <- indel.data.low[,c(6,7)]
target.low.eff <- target.low.eff[!duplicated(target.low.eff), ]
indel.data.low.drug2 <- melt(indel.data.low.eff)## Using barcode as id variables
indel.data.low.drug2 <- dcast(indel.data.low.drug2, variable ~ barcode, value.var = "value")
setnames(target.low.eff, old = "Drug", new = "variable")
indel.data.low.drug2 <- merge(indel.data.low.drug2, target.low.eff)
indel.data.low.drug2 <- indel.data.low.drug2 %>%
remove_rownames %>% column_to_rownames(var="variable")
data7 <- indel.data.low.drug2[-grep("Negative control|MRN|DNA-PK",indel.data.low.drug2$Target),]
# Heatmap per barcode
indel.data.high.drug <- indel.data.high.drug %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.med.drug <- indel.data.med.drug %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.low.drug <- indel.data.low.drug %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.high.eff <- indel.data.high.eff %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.med.eff <- indel.data.med.eff %>%
remove_rownames %>% column_to_rownames(var="barcode")
indel.data.low.eff <- indel.data.low.eff %>%
remove_rownames %>% column_to_rownames(var="barcode")# Row annotation: annotate drugs with the target group
target <- indel.data[,c(6,7)]
target <- target[!duplicated(target),]
target <- target[target$Target != c("Negative Control", "MRN", "DNA-PK"),]## Warning in `!=.default`(target$Target, c("Negative Control", "MRN", "DNA-
## PK")): longer object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
target$random <- c(1:156)
row.names(target) <- target$Drug
target2 <- target
target <- target[,c(-1,-3)]
target <- as.data.frame(target)
row.names(target) <- target2$Drug
# Add type of chr. mark (repressive or active)
type <- mapping_mean_long[,c(2,4)]
type <- unique(type)
type$random <- c(1:17)
row.names(type) <- type$variable
type2 <- type
type <- type[,c(-1,-3)]
type <- as.data.frame(type)
row.names(type) <- type2$variable
# Add annotation of barcodes (based on clustering in 0_Clone5_Annotation.Rmd)
barcode.annotation <- as.data.frame(indel.data.high[,1])
barcode.annotation <- unique(barcode.annotation)
barcode.annotation <- barcode.annotation %>%
remove_rownames %>% column_to_rownames(var="indel.data.high[, 1]")
barcode.annotation$cluster[c(18,2)] <- "Active Promoter/Enhancer"
barcode.annotation$cluster[c(12,8)] <- "H3K9me2/3 Domain"
barcode.annotation$cluster[c(3,14,4,17)] <- "Active Gene Body"
barcode.annotation$cluster[c(5,13,6,15)] <- "LAD"
barcode.annotation$cluster[c(7,16)] <- "No Marks Present"
barcode.annotation$cluster[c(1,10,9,11)] <- "H3K27me3 Domain"
# Change colors of annotations
colors <- brewer.pal(length(unique(barcode.annotation$cluster)), "Pastel2")
names(colors) <- unique(barcode.annotation$cluster)
annotation_colors_scr = list(
cluster = colors,
type = c(Active="#669966", Repressive="grey90"),
target = c(HDAC="#60988D", HAT="#C6D0A8", Sirtuin="#8FCAAC",
HMT="#FDCDAC", DNMT="#D69C81", `Histone Demethylase`="#FFF2AE",
HIF="#E3DCD5", JAK="#C8D7E2", PIM="#9AB8C4", `Aurora Kinase`="#F4CAE4",
PARP="#CB546F", `Epigenetic Reader Domain`= "#476D61", `DNA-PK`="#E07A43"
))
# Order the annotation legend
ordered_marks <- c("Active Promoter/Enhancer", "Active Gene Body", "No Marks Present", "H3K27me3 Domain", "H3K9me2/3 Domain", "LAD")
annotation_colors_scr$cluster <- annotation_colors_scr$cluster[ordered_marks]
ordered_marks2 <- c("DNA-PK","Epigenetic Reader Domain", "HDAC", "Sirtuin", "HAT",
"Histone Demethylase", "HMT", "DNMT",
"HIF", "JAK", "PIM",
"Aurora Kinase", "PARP")
annotation_colors_scr$target <- annotation_colors_scr$target[ordered_marks2]Next, I want to create heatmaps to visualize the effect of each individual drug on each of the 18 barcodes.
## This heatmap contains too much information -> We need to decrease the number of drugs ## Only drugs that change the logratio
## Read count heatmap
# Only select HDAC data, 1 uM concentration
# data2 = logratio_high, data3 = logratio_med, data4 = logratio_low,
# data5 = efficiency_high, data6 = efficiency_med, data7 = efficiency_low
hdac.logratio <- data7[data7$Target=="HDAC",]
hdac.logratio <- hdac.logratio[,-19]
hdac.logratio <- t(hdac.logratio)
# Generate pheatmap
# Use myBreaks5 for efficiency heatmap, myBreaks1 for logratio heatmap
pheatmap(as.matrix(hdac.logratio),
annotation_col = target,
annotation_row = barcode.annotation,annotation_legend = F,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
breaks = myBreaks5, cluster_rows = chip_dendogram$tree_col,
annotation_colors = annotation_colors_scr,
cellwidth = 14, cellheight = 14,
labels_row = c("Integration 15","Integration 1","Integration 5","Integration 6",
"Integration 10","Integration 9","Integration 13","Integration 3",
"Integration 17","Integration 16","Integration 18", "Integration 4",
"Integration 11","Integration 7","Integration 12", "Integration 14",
"Integration 8","Integration 2"),
main = "logratio change - 1uM of drugs added - HDACs only")## (polygon[GRID.polygon.194], polygon[GRID.polygon.195], polygon[GRID.polygon.196], polygon[GRID.polygon.197], text[GRID.text.198], text[GRID.text.199], text[GRID.text.200], text[GRID.text.201], text[GRID.text.202])
Next step is to import the chromatin data to generate a correlation matrix between the logratio change of each drug and the chromatin data. A correlation is calculated by comparing logratio changes of drug A for each barcode with ChIP data of ChIP dataset A for each barcode etc.
## Using ChIP as id variables
# There are some interesting drug-induced changes that can be explained by certain chromatin marks - this can be looked at in more detail
clusters <- as.data.frame(unique(indel.data$barcode))
clusters$cluster <- "LAD"
clusters$cluster[clusters$`unique(indel.data$barcode)` == "AGAAAATAATATGACG" |
clusters$`unique(indel.data$barcode)` == "TTGAACGCGGGCTCGG"] <- "Active_P.E."
clusters$cluster[clusters$`unique(indel.data$barcode)` == "CCGGGGACGTATGCAC" |
clusters$`unique(indel.data$barcode)` == "GCTAACATCACGAATC"] <- "H3K9"
clusters$cluster[clusters$`unique(indel.data$barcode)` == "AGGGCGTAAAATATTT" |
clusters$`unique(indel.data$barcode)` == "ATACTATATTTAACGG"|
clusters$`unique(indel.data$barcode)` == "TATGGCTGTCGGGTAG"|
clusters$`unique(indel.data$barcode)` == "TGTCCCTTAGTACTTT"] <- "Active_G.B."
clusters$cluster[clusters$`unique(indel.data$barcode)` == "CATTTCTGATCAATAA" |
clusters$`unique(indel.data$barcode)` == "TGGCCAATATTTGTCT"] <- "No"
clusters$cluster[clusters$`unique(indel.data$barcode)` == "ACTGTCGAGTTGTCCG" |
clusters$`unique(indel.data$barcode)` == "GAGCGCGTCACCGGGT"|
clusters$`unique(indel.data$barcode)` == "CGGCCTGAAGGTCAGG"|
clusters$`unique(indel.data$barcode)` == "GCGCACCCTTTAATTG"] <- "H3K27"
clusters2 <- dcast(clusters, `unique(indel.data$barcode)`~cluster)## Using 'cluster' as value column. Use 'value.var' to override
clusters2 <- clusters2 %>%
remove_rownames %>% column_to_rownames(var="unique(indel.data$barcode)")
clusters2[!is.na(clusters2)] <- 1
clusters2[is.na(clusters2)] <- 0
clusters2$Active_G.B. <- as.numeric(clusters2$Active_G.B.)
clusters2$Active_P.E. <- as.numeric(clusters2$Active_P.E.)
clusters2$H3K27 <- as.numeric(clusters2$H3K27)
clusters2$H3K9 <- as.numeric(clusters2$H3K9)
clusters2$LAD <- as.numeric(clusters2$LAD)
clusters2$No <- as.numeric(clusters2$No)
cor.clusters.high <- cor(clusters2, indel.data.high.drug)
cor.clusters.med <- cor(clusters2, indel.data.med.drug)
cor.clusters.low <- cor(clusters2, indel.data.low.drug)
cor.clusters.high.eff <- cor(clusters2, indel.data.high.eff)
cor.clusters.med.eff <- cor(clusters2, indel.data.med.eff)
cor.clusters.low.eff <- cor(clusters2, indel.data.low.eff)
# Take only drugs that have correlation of absolute > 0.5 with at least one ChIP dataset
cor.clusters.high <-
cor.clusters.high[,
colSums(cor.clusters.high > 0.6) >= 1 |
colSums(cor.clusters.high < -0.6) >= 1]
cor.clusters.med <-
cor.clusters.med[,
colSums(cor.clusters.med > 0.6) >= 1 |
colSums(cor.clusters.med < -0.6) >= 1]
cor.clusters.low <-
cor.clusters.low[,
colSums(cor.clusters.low > 0.6) >= 1 |
colSums(cor.clusters.low < -0.6) >= 1]
cor.clusters.high.eff <-
cor.clusters.high.eff[,
colSums(cor.clusters.high.eff > 0.6) >= 1 |
colSums(cor.clusters.high.eff < -0.6) >= 1]
cor.clusters.med.eff <-
cor.clusters.med.eff[,
colSums(cor.clusters.med.eff > 0.6) >= 1 |
colSums(cor.clusters.med.eff < -0.6) >= 1]
cor.clusters.low.eff <-
cor.clusters.low.eff[,
colSums(cor.clusters.low.eff > 0.6) >= 1 |
colSums(cor.clusters.low.eff < -0.6) >= 1]
# plotting the correlations
cor.pheatmap <-
list(cor.clusters.high, cor.clusters.med, cor.clusters.low)
cor.pheatmap.eff <-
list(cor.clusters.high.eff, cor.clusters.med.eff, cor.clusters.low.eff)
for(i in 1:(length(cor.pheatmap))) {
data <- cor.pheatmap[[i]]
p <- pheatmap(data,
annotation_col = target,
annotation_row = type,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
annotation_colors = annotation_colors_scr, annotation_legend = F,
main = paste(i,"correlation heatmap: ChiP data vs. logratio change per drug"))
print(p)
}for(i in 1:(length(cor.pheatmap.eff))) {
data <- cor.pheatmap.eff[[i]]
p <- pheatmap(data,
annotation_col = target,
annotation_row = type,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
annotation_colors = annotation_colors_scr, annotation_legend = F,
main = paste(i,"correlation heatmap: ChiP data vs. efficiency change per drug"))
print(p)
}# Boxplot
mean.ratio <- indel.data[indel.data$Drug == "CTPB" & indel.data$conc == "10um",]
mean.ratio <- mean.ratio[,c(1,25)]
mean.ratio$logratio.platenorm <- ave(mean.ratio$logratio.platenorm, mean.ratio$barcode, FUN = function(x) mean(x))
mean.ratio <- unique(mean.ratio)
mean.ratio <- mean.ratio %>%
remove_rownames %>% column_to_rownames(var="barcode")
# Combine dfs
clusters <- clusters %>%
remove_rownames %>% column_to_rownames(var="unique(indel.data$barcode)")
mean.ratio2 <- cbind(mean.ratio, clusters)
# Boxplot
ggplot(mean.ratio2, aes(x=cluster, y=logratio.platenorm, fill = cluster))+
geom_boxplot()+
scale_fill_brewer(palette = "Pastel2")+
scale_y_continuous(breaks=c(-0.5,0,0.5))+ theme_classic2()## Statistical analysis: are the groups significantly different?
mean.ratio <- indel.data[indel.data$Drug == "AT9283" & indel.data$conc == "1um",]
mean.ratio <- mean.ratio[,c(1,12,25)]
mean.ratio$logratio.platenorm <- ave(mean.ratio$logratio.platenorm, mean.ratio$rep, mean.ratio$barcode, FUN = function(x) mean(x))
mean.ratio <- unique(mean.ratio)
clusters2 <- tibble::rownames_to_column(as.data.frame(clusters), "barcode")
mean.ratio <- merge(mean.ratio, clusters2)
t.test(mean.ratio$logratio.platenorm[mean.ratio$cluster == "Active_G.B."],
mean.ratio$logratio.platenorm[mean.ratio$cluster == "Active_P.E."])$p.value## [1] 0.08515303
## Df Sum Sq Mean Sq F value Pr(>F)
## cluster 5 2.370 0.4741 5.77 0.000759 ***
## Residuals 30 2.465 0.0822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## In case I want to include the mean.ratio
mean.ratio <- indel.data[indel.data$Drug == "Decitabine",]
mean.ratio <- mean.ratio[,c(1,25)]
mean.ratio$logratio.platenorm <- ave(mean.ratio$logratio.platenorm, mean.ratio$barcode, FUN = function(x) mean(x))
mean.ratio <- unique(mean.ratio)
## Ratio is influenced by which ChIP datasets?
mean.ratio2 <- mean.ratio
mean.ratio2 <- mean.ratio2 %>%
remove_rownames %>% column_to_rownames(var="barcode")
# chip_data <- chip_data %>%
# remove_rownames %>% column_to_rownames(var="rowname")
cor_ratio <- cor(mean.ratio2, chip_data)
cor_ratio <- as.data.frame(cor_ratio)
cor_ratio <- t(cor_ratio)
cor_ratio <- tibble::rownames_to_column(as.data.frame(cor_ratio), "ChIP")
ggplot(cor_ratio, aes(x = reorder(ChIP,logratio.platenorm, function(x) -x), y = logratio.platenorm, fill=logratio.platenorm)) +
geom_bar(stat="identity") +
xlab("ChIP Dataset") + ylab("Pearson Correlation") +
scale_fill_gradient2(low = "#D67143", high = "#8EBF7A",
name = "Pearson
Correlation")+
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12))+
labs(title = "Correlation MC-1293 (1 uM) Logratio Change per Barcode vs. Each ChIP Dataset")# Scatterplot
mean.ratio <- tibble::rownames_to_column(as.data.frame(mean.ratio2))
chip_data <- tibble::rownames_to_column(as.data.frame(chip_data))
hdac1.data <- chip_data[,c(1,18)]
hdac.1.data <- cbind(hdac1.data, mean.ratio2)
ggplot(hdac.1.data, aes(x=WGBS, y=logratio.platenorm))+
geom_point()+
geom_smooth(method = lm) +
theme_classic2()## In case I want to include the mean.ratio
mean.ratio <- indel.data[indel.data$Drug == "AR-42" | indel.data$Drug == "Vorinostat (SAHA, MK0683)" |
indel.data$Drug == "PCI-24781 (Abexinostat)" |
indel.data$Drug == "Givinostat (ITF2357)" |
indel.data$Drug == "Scriptaid" |
indel.data$Drug == "DMSO" & indel.data$conc == "1um",]
mean.ratio <- mean.ratio[,c(1,6,25)]
mean.ratio$logratio.platenorm <- ave(mean.ratio$logratio.platenorm, mean.ratio$Drug, mean.ratio$barcode, FUN = function(x) mean(x))
mean.ratio <- unique(mean.ratio)
setnames(mean.ratio, "barcode", "Var2")
## Ratio is influenced by which ChIP datasets?
mean.ratio2 <- mean.ratio
mean.ratio2 <- dcast(mean.ratio2, Var2 ~ Drug)## Using 'logratio.platenorm' as value column. Use 'value.var' to override
mean.ratio2 <- mean.ratio2 %>%
remove_rownames %>% column_to_rownames(var="Var2")
mean.ratio2 <- hdac[,colSums(hdac > 1) >= 1 | colSums(hdac < -0.7) >= 1]
chip_data <- chip_data %>%
remove_rownames %>% column_to_rownames(var="rowname")
cor_ratio <- cor(mean.ratio2, chip_data)
cor_ratio <- as.data.frame(cor_ratio)
cor_ratio <- t(cor_ratio)
cor_ratio <- tibble::rownames_to_column(as.data.frame(cor_ratio), "ChIP")
cor_ratio <- melt(cor_ratio)## Using ChIP as id variables
ggplot(cor_ratio, aes(x = reorder(ChIP, value, function(x) -mean(x)), y = value, fill = value)) +
geom_boxplot() +
geom_jitter(aes(color=variable))+
xlab("ChIP Dataset") + ylab("Pearson Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12))+
labs(title = "Correlation HDACs Logratio Change per Barcode vs. Each ChIP Dataset")# Scatterplot
chip_data <- tibble::rownames_to_column(as.data.frame(chip_data))
h3k27.data <- chip_data[,c(1,4)]
h3k27.data <- cbind(h3k27.data, mean.ratio2)
h3k27.data <- melt(h3k27.data, id.vars = c("rowname","H3K27me3"))
ggplot(h3k27.data, aes(x=H3K27me3, y=value, color = variable))+
geom_point()+
geom_smooth(method = lm) +
scale_color_brewer(palette = "Set2")+
theme_classic2()# mc1293.indel.data <- indel.data.med.drug[,73, drop = F]
# mc1293.indel.data <- tibble::rownames_to_column(as.data.frame(mc1293.indel.data), "barcode")
# barcode.order2 <- barcode.order[,-2]
# mc1293.indel.data <- merge(mc1293.indel.data, barcode.order2)
# mc1293.indel.data <- mc1293.indel.data[,-1]
#
# chip_data2 <- chip_data[,10, drop = F]
# chip_data2 <- tibble::rownames_to_column(as.data.frame(chip_data2), "barcode")
# chip_data2 <- merge(chip_data2, barcode.order2)
# chip_data2 <- chip_data2[,-1]
#
#
#
# mc1293.hdac1 <- merge(mc1293.indel.data, chip_data2)
# mc1293.hdac1 <- melt(mc1293.hdac1)
#
# ggplot(mc1293.hdac1, aes(x = reorder(integration, value), y = value, fill = variable, group = variable))+
# geom_bar(stat="identity", position = "dodge")+
# theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12))## [1] "Run time: 16.3733 secs"
## [1] "/DATA/usr/m.trauernicht/projects/EpiScreen/epigenetic-screening-on-trip-clone"
## [1] "Tue Sep 3 16:05:55 2019"
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] Laurae_0.0.0.9001 ggpubr_0.2 magrittr_1.5
## [4] tibble_2.1.1 ggforce_0.2.2 VennDiagram_1.6.20
## [7] futile.logger_1.4.3 tidyr_0.8.3 Pigengene_1.10.0
## [10] graph_1.62.0 BiocGenerics_0.30.0 RColorBrewer_1.1-2
## [13] gridExtra_2.3 gridGraphics_0.4-0 pheatmap_1.0.12
## [16] circlize_0.4.6 ComplexHeatmap_2.0.0 data.table_1.12.2
## [19] ggbeeswarm_0.6.0 ggplot2_3.1.1 outliers_0.14
## [22] dplyr_0.8.1
##
## loaded via a namespace (and not attached):
## [1] Cubist_0.2.2 colorspace_1.4-1 rjson_0.2.20
## [4] dynamicTreeCut_1.63-1 htmlTable_1.13.1 GlobalOptions_0.1.0
## [7] base64enc_0.1-3 clue_0.3-57 rstudioapi_0.10
## [10] farver_1.1.0 bit64_0.9-7 AnnotationDbi_1.46.0
## [13] mvtnorm_1.0-10 codetools_0.2-16 splines_3.6.1
## [16] doParallel_1.0.14 impute_1.58.0 robustbase_0.93-5
## [19] libcoin_1.0-4 knitr_1.22 polyclip_1.10-0
## [22] Formula_1.2-3 WGCNA_1.67 cluster_2.0.9
## [25] GO.db_3.8.2 png_0.1-7 rrcov_1.4-7
## [28] compiler_3.6.1 backports_1.1.4 assertthat_0.2.1
## [31] Matrix_1.2-17 lazyeval_0.2.2 tweenr_1.0.1
## [34] formatR_1.6 acepack_1.4.1 htmltools_0.3.6
## [37] tools_3.6.1 partykit_1.2-3 gtable_0.3.0
## [40] glue_1.3.1 reshape2_1.4.3 Rcpp_1.0.1
## [43] Biobase_2.44.0 preprocessCore_1.46.0 iterators_1.0.10
## [46] inum_1.0-1 xfun_0.7 fastcluster_1.1.25
## [49] stringr_1.4.0 DEoptimR_1.0-8 MASS_7.3-51.4
## [52] scales_1.0.0 lambda.r_1.2.3 yaml_2.2.0
## [55] C50_0.1.2 memoise_1.1.0 rpart_4.1-15
## [58] latticeExtra_0.6-28 stringi_1.4.3 RSQLite_2.1.1
## [61] S4Vectors_0.22.0 pcaPP_1.9-73 foreach_1.4.4
## [64] checkmate_1.9.3 shape_1.4.4 rlang_0.3.4
## [67] pkgconfig_2.0.2 matrixStats_0.54.0 evaluate_0.13
## [70] lattice_0.20-38 purrr_0.3.2 labeling_0.3
## [73] htmlwidgets_1.3 bit_1.1-14 tidyselect_0.2.5
## [76] robust_0.4-18 plyr_1.8.4 R6_2.4.0
## [79] IRanges_2.18.0 Hmisc_4.2-0 fit.models_0.5-14
## [82] DBI_1.0.0 pillar_1.4.0 foreign_0.8-71
## [85] withr_2.1.2 survival_2.44-1.1 nnet_7.3-12
## [88] crayon_1.3.4 bnlearn_4.4.1 futile.options_1.0.1
## [91] rmarkdown_1.12 GetoptLong_0.1.7 blob_1.1.1
## [94] Rgraphviz_2.28.0 digest_0.6.18 stats4_3.6.1
## [97] munsell_0.5.0 beeswarm_0.2.3 vipor_0.4.5